podatki <- read.table("/cloud/project/Poglavje 1/Naloga 1/Mesta.csv", header=TRUE, sep=";", dec=",")
str(podatki)
## 'data.frame': 43 obs. of 4 variables:
## $ Mesto : chr "Amsterdam" "Atene" "Barcelona" "Beograd" ...
## $ Delo : num 30.4 41.7 37.5 42.6 34.8 40.1 37.2 39.5 39.8 36.5 ...
## $ Cene : int 139 99 109 70 120 85 130 75 71 142 ...
## $ Dohodek: int 148 75 103 65 158 103 148 94 86 129 ...
Opis spremenljivk:
podatki_MGK <- podatki[, -1]
library(pastecs)
round(stat.desc(podatki_MGK, basic=FALSE), 2)
## Delo Cene Dohodek
## median 37.40 125.00 115.00
## mean 37.55 120.88 120.58
## SE.mean 0.38 5.60 5.65
## CI.mean.0.95 0.77 11.30 11.41
## var 6.20 1348.58 1374.34
## std.dev 2.49 36.72 37.07
## coef.var 0.07 0.30 0.31
R <- cor(podatki_MGK)
round(R, 3)
## Delo Cene Dohodek
## Delo 1.000 -0.709 -0.761
## Cene -0.709 1.000 0.750
## Dohodek -0.761 0.750 1.000
library(psych)
cortest.bartlett(R, n=nrow(podatki))
## $chisq
## [1] 72.38292
##
## $p.value
## [1] 1.317917e-15
##
## $df
## [1] 3
library(psych)
KMO(R)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = R)
## Overall MSA = 0.75
## MSA for each item =
## Delo Cene Dohodek
## 0.76 0.77 0.72
library(FactoMineR)
mgk <- PCA(podatki_MGK,
scale.unit = TRUE,
graph = FALSE)
library(factoextra)
get_eigenvalue(mgk)
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 2.4803702 82.679006 82.67901
## Dim.2 0.2914122 9.713740 92.39275
## Dim.3 0.2282176 7.607255 100.00000
library(factoextra)
fviz_eig(mgk,
choice = "eigenvalue",
main = "Diagram lastnih vrednosti",
ylab = "Lastna vrednost",
xlab = "Glavna komponenta",
addlabels = TRUE)
library(psych)
fa.parallel(podatki_MGK,
sim = FALSE,
fa = "pc")
## Parallel analysis suggests that the number of factors = NA and the number of components = 1
library(FactoMineR)
mgk <- PCA(podatki_MGK,
scale.unit = TRUE,
graph = FALSE,
ncp = 1)
print(mgk$var$cor)
## Delo Cene Dohodek
## 0.9055009 -0.9005352 -0.9216695
print(mgk$var$contrib)
## Delo Cene Dohodek
## 33.05684 32.69527 34.24790
podatki$GK1 <- mgk$ind$coord[ , 1]
head(podatki, 3)
## Mesto Delo Cene Dohodek GK1
## 1 Amsterdam 30.4 139 148 -2.3937464
## 2 Atene 41.7 99 75 2.0419237
## 3 Barcelona 37.5 109 103 0.4560978
library(ggplot2)
ggplot(podatki, aes(y=GK1, x=Mesto)) +
theme_linedraw() +
geom_bar(stat="identity") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))